library(data.table)

setwd("/mnt/md1200/6/yjp/5hmc_analysis_hg19_new/20201207")
file=read.csv("at.least.one.AShM.in.DC.add.BF.beta0.add.CCHC.csv",head=T)
file$id=paste(file$Chr,file$Start,sep=":")
gwas.scz1=fread("/mnt/md1200/5/zhaocunyou/wangzhongju/GWAS_data_by_wangzhongju/GWAS/ckqny.scz2snpres",header=T,sep="\t")#精分的GWAS data
gwas.scz1=gwas.scz1[gwas.scz1$p<0.05,]
gwas.scz1$id=paste(gwas.scz1$hg19chrc,gwas.scz1$bp,sep=":")
gwas.scz1=data.frame(gwas.scz1$id,gwas.scz1$p,group.gwas=rep("gwas.ckqny.scz",dim(gwas.scz1)[1]))

gwas.scz2=fread("/mnt/md1200/5/zhaocunyou/wangzhongju/GWAS_data_by_wangzhongju/GWAS/clozuk_pgc2.meta.sumstats.ma",header=T,sep="\t")#精分的GWAS data
gwas.scz2=gwas.scz2[gwas.scz2$p<0.05,]
gwas.scz2$chr=paste("chr",gwas.scz2$chr,sep="")
gwas.scz2$id=paste(gwas.scz2$chr,gwas.scz2$pos,sep=":")
gwas.scz2=data.frame(gwas.scz2$id,gwas.scz2$p,group.gwas=rep("gwas.clozuk_pgc2.scz",dim(gwas.scz2)[1]))

gwas.pgc3.scz=fread("/mnt/md1200/6/yjp/5hmc_analysis_hg19_new/20210113/PGC3_SCZ_wave3_public.v2.tsv",header=T,sep="\t",fill=T,quote="")#PGC3.0精分的GWAS data
gwas.pgc3.scz=gwas.pgc3.scz[gwas.pgc3.scz$P<0.05,]
gwas.pgc3.scz$CHR=paste("chr",gwas.pgc3.scz$CHR,sep="")
gwas.pgc3.scz$id=paste(gwas.pgc3.scz$CHR,gwas.pgc3.scz$BP,sep=":")
gwas.pgc3.scz=data.frame(gwas.pgc3.scz$id,gwas.pgc3.scz$P,group.gwas=rep("gwas.pgc3.scz",dim(gwas.pgc3.scz)[1]))

gwas.bd=fread("/mnt/md1200/5/zhaocunyou/wangzhongju/GWAS_data_by_wangzhongju/GWAS/daner_PGC_BIP32b_mds7a_0416a",header=T,sep="\t")#双相的GWAS data
gwas.bd=gwas.bd[gwas.bd$P<0.05,]
gwas.bd$CHR=paste("chr",gwas.bd$CHR,sep="")
gwas.bd$id=paste(gwas.bd$CHR,gwas.bd$BP,sep=":")
gwas.bd=data.frame(gwas.bd$id,gwas.bd$P,group.gwas=rep("gwas.PGC.BPD",dim(gwas.bd)[1]))

names(gwas.scz1)=c("id","gwas.ckqny.scz.pvalue","group.gwas.ckqny.scz")
names(gwas.scz2)=c("id","gwas.clozuk_pgc2.scz.pvalue","group.gwas.clozuk_pgc2.scz")
names(gwas.pgc3.scz)=c("id","gwas.pgc3.scz.pvalue","group.gwas.pgc3.scz")
names(gwas.bd)=c("id","gwas.bd.pvalue","group.gwas.bd")

file=merge(file,gwas.scz1,by="id",all.x=T)
file=merge(file,gwas.scz2,by="id",all.x=T)
file=merge(file,gwas.pgc3.scz,by="id",all.x=T)
file=merge(file,gwas.bd,by="id",all.x=T)

coln=names(file)
sel=coln[grep(coln,pattern="gwas")][seq(2,8,2)]

file$gwas.no.na.num=rowSums(!is.na(file[,sel]),na.rm=T)
write.table(file,"53K.add.GWAS.txt",quote=F,row.names=F,sep="\t")

LIBD=read.csv("53K.LIBD.eQTL.merge.csv",head=T)			###eQTL数据合并
LIBD=LIBD[,-1]
names(LIBD)=c("id","LIBD.eQTL.tissue")
eqtl=fread("/mnt/md1200/5/zhaocunyou/wangzhongju/eQTL_analysis_by_wangzhongju/eQTL_data/GTEx_v6p_eQTL_for_lqy.txt",header=T,sep="\t")
eqtl1=tidyr::separate(eqtl,SNP,into=c("Chr","Pos","Alt","Ref","Other"),sep="_")
eqtl1$Chr=paste0("chr",eqtl1$Chr)
eqtl1$id=paste(eqtl1$Chr,eqtl1$Pos,sep=":")
gtx=data.frame(id=eqtl1$id,GTx.p.value=eqtl1$P,GTx.eQTL.tissue=eqtl1$tissue)

file=merge(file,LIBD,by="id",all.x=T)
file=merge(file,gtx,by="id",all.x=T)

coln=names(file)
sel=coln[grep(coln,pattern="eQTL")]
file$eQTL.no.na.num=rowSums(!is.na(file[,sel]),na.rm=T)

result=file
str1=unique(as.character(result$id))

rt=result[1,]		#合并Gtx.tissue那一列为一行
rt=rt[-1,]
for(j in 1:length(str1)){
  tmp=result[result$id==str1[j],]
  tmp1=tmp[1,]
  tmp1$group.LIBD=paste(tmp$GTx.eQTL.tissue,collapse=";")
  rt=rbind(rt,tmp1)
}

write.table(file,"53K.add.GWAS.eQTL.txt",quote=F,row.names=F,sep="\t")

setwd("E:/5hmc_file/2_5hmc_yjp_bam/ASM/20210316LIBD.eQTL处理")
file=read.table("53K.add.GWAS.eQTL.txt",header = T,sep="\t")

result=file
str1=unique(as.character(result$id))

rt=result[1,]
rt=rt[-1,]
for(j in 1:length(str1)){
  tmp=result[result$id==str1[j],]
  tmp1=tmp[1,]
  tmp1$GTx.eQTL.tissue=paste(tmp$GTx.eQTL.tissue,collapse=";")
  rt=rbind(rt,tmp1)
}
write.table(rt,"53K.add.GWAS.eQTL.txt",quote=F,row.names = F,sep="\t")

sci=read.csv("E:/0 公共数据库差异情况/2018 Science supp/aat8127_Table_S1a_DGE.csv",header=T)		###DEG数据合并
sci_scz=sci[sci$SCZ.fdr<0.1,]
sci_bd=sci[sci$BD.fdr<0.1,]
sci_scz=data.frame(symbol=sci_scz$gene_name,SCZ.log2FC=sci_scz$SCZ.log2FC,SCZ.2018sci.fdr=sci_scz$SCZ.fdr)
sci_bd=data.frame(symbol=sci_bd$gene_name,BD.log2FC=sci_bd$BD.log2FC,BD.2018sci.fdr=sci_bd$BD.fdr)
#sci_scz$group.2018sci.scz="SCZ"
#sci_bd$group.2018sci.BD="BD"

cmc.nosva=read.xlsx("E:/0 公共数据库差异情况/CMC/DEG_nosva.xlsx",sheet=1)
cmc.nosva=cmc.nosva[cmc.nosva$adj.P.Val<0.1,]
cmc.nosva=cmc.nosva[!cmc.nosva$MAPPED_genes==".",]
cmc.nosva=data.frame(symbol=cmc.nosva$MAPPED_genes,cmc.logFC=cmc.nosva$logFC,cmc.nosva.fdr=cmc.nosva$adj.P.Val)
cmc.nosva$cmc.nosva.group="CMC.nosva"

deg=merge(sci_scz,sci_bd,by="symbol",all=T)
deg=merge(deg,cmc.nosva,by="symbol",all=T)


coln=names(deg)
sel=coln[grep(coln,pattern = "fdr")]
deg$DEG.no.na.num=rowSums(deg[,sel]>0,na.rm = T)
write.csv(deg,"E:/5hmc_file/2_5hmc_yjp_bam/ASM/20210316LIBD.eQTL处理/DEG.csv",quote=F,row.names = F)

test=rt
test=tidyr::separate_rows(test,Gene.refGene,sep=";")
test=merge(test,deg,by.x="Gene.refGene",by.y = "symbol",all.x = T)
test[is.na(test$DEG.no.na.num),]$DEG.no.na.num=0

result=test
str1=unique(as.character(result$id))

rt=result[1,]
rt=rt[-1,]
for(j in 1:length(str1)){
  tmp=result[result$id==str1[j],]
  tmp1=tmp[1,]
  tmp1$Gene.refGene=paste(tmp$Gene.refGene,collapse=";")
  rt=rbind(rt,tmp1)
}
write.table(rt,"53K.add.GWAS.eQTL.DEG.txt",quote=F,row.names = F,sep="\t")
motif=read.table("E:/5hmc_file/2_5hmc_yjp_bam/ASM/20210103.motif.analysis/117K.ASH.motif.predict.txt",head=T,sep="\t")
file=rt
motif=read.table("E:/5hmc_file/2_5hmc_yjp_bam/ASM/20210103.motif.analysis/117K.ASH.motif.predict.txt",head=T,sep="\t")
motif$id=paste(motif$seqnames,motif$snpPos,sep=":")
motif$id2=paste(motif$id,motif$geneSymbol,sep="_")
motif=motif[!duplicated(motif$id2),]

motifdata=data.frame(id=motif$id,geneSymbol=motif$geneSymbol,alleleRef=motif$alleleRef,alleleAlt=motif$alleleAlt,scoreRef=motif$scoreRef,scoreAlt=motif$scoreAlt,motif$effect)
motifdata=motifdata[motifdata$id %in% as.character(rt$id),]

str1=unique(motifdata$id)
rt=motifdata[1,]
rt=rt[-1,]
for(j in 1:length(str1)){
  tmp=motifdata[motifdata$id==str1[j],]
  tmp1=tmp[1,]
  tmp1$geneSymbol=paste(tmp$geneSymbol,collapse=";")
  tmp1$alleleRef=paste(tmp$alleleRef,collapse=";")
  tmp1$alleleAlt=paste(tmp$alleleAlt,collapse=";")
  tmp1$scoreRef=paste(tmp$scoreRef,collapse=";")
  tmp1$scoreAlt=paste(tmp$scoreAlt,collapse=";")
  tmp1$motif.effect=paste(tmp$motif.effect,collapse=";")
  rt=rbind(rt,tmp1)
}


test=merge(file,rt,by="id",all.x=T)
write.table(test,"53K.add.GWAS.eQTL.DEG.motif.for.wenzhang.txt",quote=F,row.names = F,sep="\t")	###文章用

file=read.table("53K.add.GWAS.eQTL.DEG.txt",head=T,sep="\t")
file=merge(file,motif,by="id",all.x=T)
write.table(file,"53K.add.GWAS.eQTL.DEG.motif.for.analysis.txt",quote=F,row.names = F,sep="\t")	###自己分析用